suppressWarnings(library(curatedOvarianData)) 
## Loading required package: affy
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
data(package="curatedOvarianData")

The TCGA_eset dataset contains gene expression data for 13,104 features (genes) across 578 samples. The data has been preprocessed and normalized using a standardized pipeline to remove batch effects and other sources of technical variability. The resulting dataset is stored as an Expression Set object in R, which includes not only the gene expression data but also sample annotation information and other metadata.

data(TCGA_eset) 
# Get gene names
gene_names <- featureNames(TCGA_eset)
head(gene_names, 10)
##  [1] "A1CF"  "A2M"   "A4GNT" "AAAS"  "AACS"  "AADAC" "AAGAB" "AAK1"  "AAMDC"
## [10] "AAMP"

Extract metadata and clinical information:

we can access this information using the pData() and fData() functions from the Biobase package.

#summary(TCGA_eset)
dim(TCGA_eset)
## Features  Samples 
##    13104      578
library(Biobase)
sample_metadata <- pData(TCGA_eset)
#head(sample_metadata)
feature_metadata <- fData(TCGA_eset)
#head(feature_metadata)

Heatmaps

suppressWarnings(library(curatedOvarianData))
suppressWarnings(library(pheatmap))


# Extract gene expression data
exprs_matrix <- exprs(TCGA_eset)

rownames(exprs_matrix) <- featureNames(TCGA_eset)

# Subset to top 20 genes that have a relatively high variance
exprs_matrix <- exprs_matrix[apply(exprs_matrix, 1, function(x) var(x) > 0.5), ][20:40, ]

# Create heatmap
pheatmap(exprs_matrix, scale = "row", clustering_distance_rows = "euclidean", show_rownames = T, show_colnames=F)

The plot shows a heatmap of the expression levels of the top 20 most variable genes in a subset of samples. Each row represents a gene, and each column represents a sample. The color scale represents the level of expression for each gene in each sample, with red indicating high expression and blue indicating low expression. The rows are clustered using euclidean distance and the columns are not clustered

suppressWarnings(library(ComplexHeatmap))
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.14.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). Most of the arguments
##    in the original pheatmap() are identically supported in the new function. You 
##    can still use the original function by explicitly calling pheatmap::pheatmap().
## 
## Attaching package: 'ComplexHeatmap'
## The following object is masked from 'package:pheatmap':
## 
##     pheatmap
top_gene_expression <- exprs(TCGA_eset)
sample_metadata <- pData(TCGA_eset)
top_gene_expression_t <- t(top_gene_expression)
combined_data <- data.frame(Sample_Type = sample_metadata$sample_type, top_gene_expression_t)
#non_na_indices contain the indices of all samples that have a non-missing value
non_na_indices <- which(!is.na(sample_metadata$sample_type))
# we create a new data frame with the Sample_Type column, using only the rows corresponding to the non_na_indices.
sample_annotation_filtered <- data.frame(Sample_Type = sample_metadata$sample_type[non_na_indices])
rownames(sample_annotation_filtered) <- rownames(sample_metadata)[non_na_indices]
# new expression matrix that includes the columns for samples with non-missing Sample_Type values.
top_gene_expression_filtered <- top_gene_expression[, non_na_indices]
col_fun <- colorRampPalette(c("blue", "white", "red"))(100)

Heatmap(top_gene_expression_filtered,
        col = col_fun,
        name = "Expression",
        cluster_rows = TRUE,
        cluster_columns = TRUE,
        show_column_names = FALSE,
        show_row_names = FALSE,
        #add annotations to the heatmap based on the sample type column(healthy and tumor)
        top_annotation = HeatmapAnnotation(data = sample_annotation_filtered$Sample_Type))
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

The heatmap is showing the gene expression levels for the top variable genes in the dataset, with rows representing genes and columns representing individual samples. The color scale represents the relative expression levels of the genes, with red indicating high expression and blue indicating low expression. The samples are annotated with their sample type (tumor or healthy).This allows for visual inspection of any potential patterns or differences in gene expression between different sample types.

Correlation Plot

suppressWarnings(library(corrplot))
## corrplot 0.92 loaded
gene_expression <- exprs(TCGA_eset)
# Select a subset of genes (e.g., the first 20 genes)
selected_genes <- gene_expression[1:20, ]

# Select a subset of samples (e.g., the first 100 samples)
selected_samples <- selected_genes[, 1:100]

correlation_matrix_subset <- cor(t(selected_samples), method = "pearson")

corrplot(correlation_matrix_subset, method = "color", type = "upper", mar = c(0, 0, 1, 1))

This plot is showing the correlation between the expression levels of a subset of genes (the first 20 genes) in a subset of samples (the first 100 samples). The plot is a correlation matrix, where each cell represents the correlation between the expression levels of two genes. The color of each cell indicates the strength of the correlation, with darker colors indicating stronger positive or negative correlations.

suppressWarnings(library(corrplot))
gene_expression <- exprs(TCGA_eset)
# Select a subset of genes (e.g., the first 100 genes)
selected_genes <- gene_expression[1:100, ]

# Select a subset of samples (e.g., the first 100 samples)
selected_samples <- selected_genes[, 1:100]

correlation_matrix_subset <- cor(t(selected_samples), method = "pearson")

corrplot(correlation_matrix_subset, method = "color", type = "upper", mar = c(0, 0, 1, 1),tl.pos="n")

Network visualization

suppressWarnings(library(igraph))
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:BiocGenerics':
## 
##     normalize, path, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
# Load the TCGA_eset dataset
data(TCGA_eset)

# Extract the gene expression data
tcga.exprs <- exprs(TCGA_eset)
# Subset the gene expression data to the first 30 samples
tcga.exprs <- tcga.exprs[, 1:30]

#log2 transformation can help to normalize the range of expression values and make the graph more informative.
log2.exprs <- log2(tcga.exprs + 1)

# Calculate the variance for each gene
gene.variance <- apply(log2.exprs, 1, var)

# Order genes by variance and select the top 100
top.genes <- head(order(gene.variance, decreasing = TRUE), 100)

# Subset the gene expression data with the top 50 genes
subset.exprs <- log2.exprs[top.genes, ]

# Generate a correlation matrix of the gene expression data
cor.matrix <- cor(subset.exprs)

# Create an igraph graph object
graph <- graph.adjacency(cor.matrix, mode = "undirected", weighted = TRUE)
# Remove self-loops
graph <- simplify(graph, remove.multiple = FALSE, remove.loops = TRUE)
# Set node color based on gene expression level
node.color <- ifelse(rowMeans(subset.exprs) > log2(6), "red", "blue")

# Plot the graph with node color based on gene expression level
plot(graph, vertex.size =8, vertex.color = node.color,vertex.label.cex = 0.7, edge.width = E(graph)$weight*2)
## Warning in v(graph): Non-positive edge weight found, ignoring all weights
## during graph layout.

Each node represents a gene.The edges connecting the nodes represent the correlation between the expression levels of the connected genes. The edge width is proportional to the correlation strength.The node color is determined by the average gene expression level. If the mean expression level is greater than log2(6), the node is colored red, otherwise, it’s colored blue.

suppressWarnings(library(affy))
suppressWarnings(library(circlize))
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
## 
## Attaching package: 'circlize'
## The following object is masked from 'package:igraph':
## 
##     degree
data(TCGA_eset)

# Extract gene expression data
gene_expression <- exprs(TCGA_eset)

# Calculate variance for each gene
gene_variance <- apply(gene_expression, 1, var)

# Get the indices of the top 20 genes with the highest variance
top_genes <- order(gene_variance, decreasing = TRUE)[1:20]

# Subset the gene expression data with the top 20 genes
subset_exprs <- gene_expression[top_genes, ]

# Calculate the correlation matrix of the gene expression data
correlation_matrix <- cor(t(subset_exprs))

grid.col <- setNames(rainbow(length(top_genes)), top_genes)


chordDiagram(correlation_matrix, annotationTrack = "grid", 
             preAllocateTracks = 1, 
             grid.col = grid.col,
             directional = 1, 
             direction.type = c("diffHeight", "arrows"), 
             link.arr.type = "big.arrow")

circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
  xlim = get.cell.meta.data("xlim")
  ylim = get.cell.meta.data("ylim")
  sector.name = get.cell.meta.data("sector.index")
  circos.text(CELL_META$xcenter, 
              ylim[1] + cm_h(2), 
              sector.name, 
              facing = "clockwise",
              niceFacing = TRUE, 
              adj = c(0, 0.5),
              cex = .5,
              col=grid.col[sector.name],
              font = 1)
  circos.axis(h = "bottom",
              labels.cex = .1,
              sector.index = sector.name
              )
}, bg.border = NA)

we are using a chord diagram to visualize the gene expression data of the top 6 genes and 6 selected samples. The chord diagram shows the relationship between the genes and samples by connecting them with chords. The thickness of the chords represents the strength of the correlation between the genes and samples.The colors are just used to differentiate between the different gene-sample connections in the plot. Each gene is represented by a unique color, and each connection between a gene and a sample is represented by a segment with that gene’s color.

suppressWarnings(library(GEOquery))
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
suppressWarnings(library(networkD3))

# Download the GSE113690 dataset
gse <- getGEO("GSE113690")
## Found 1 file(s)
## GSE113690_series_matrix.txt.gz
eset <- gse[[1]]

# Extract gene expression data
exprs <- exprs(eset)

# Calculate variance for each gene
gene_variance <- apply(exprs, 1, var)

# Get the indices of the top 20 genes with the highest variance
top_genes <- head(order(gene_variance, decreasing = TRUE), 20)

# Randomly select 20 samples
set.seed(1)
selected_samples <- sample(ncol(exprs), 20)

# Subset the gene expression data with the top 20 genes and the selected 20 samples
subset_exprs <- exprs[top_genes, selected_samples]

# Calculate correlation matrix
cor_mat <- cor(subset_exprs)

# Convert correlation matrix to edge list
edge_list <- as.data.frame(as.table(cor_mat))
colnames(edge_list) <- c("source", "target", "weight")

# Create the simple network

simpleNetwork(edge_list,linkColour = "blue")

This plot is showing a simple network diagram where nodes represent the top 20 genes with the highest variance and edges represent the correlation between them based on the subset of gene expression data. The thickness of the edges represents the strength of the correlation between the genes.

suppressWarnings(library(visNetwork))
library(visNetwork)
# Create a data frame with nodes and edges
nodes <- data.frame(id = 1:10, 
                    label = paste0("Node ", 1:10), 
                    value = sample(1:10, 10, replace = TRUE),
                    color = c("red", "blue", "green", "orange", "purple", 
                              "yellow", "pink", "brown", "grey", "black"))
edges <- data.frame(from = sample(1:10, 20, replace = TRUE), 
                    to = sample(1:10, 20, replace = TRUE))

# Create the network visualization
visNetwork(nodes, edges) %>% 
  visOptions(highlightNearest = TRUE, 
             nodesIdSelection = TRUE) %>% 
  visLayout(randomSeed = 123)
suppressWarnings(library(nycflights13))
suppressWarnings(library(visNetwork))
suppressWarnings(library(dplyr))
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# subset only 10 flights from each origin airport
flights_subset <- flights %>%
  group_by(origin) %>%
  sample_n(size = 10)

# create nodes and edges data frames
nodes <- data.frame(id = unique(c(flights_subset$origin, flights_subset$dest)), 
                    label = unique(c(flights_subset$origin, flights_subset$dest)))
edges <- data.frame(from = flights_subset$origin, to = flights_subset$dest)

# create the network visualization
visNetwork(nodes, edges, width = "100%") %>%
  visNodes(shape = "dot", size = 10) %>%
  visEdges(arrows = "to", smooth = TRUE) %>%
  visLayout(randomSeed = 123) 

Diagram

suppressWarnings(library(DiagrammeR))
## 
## Attaching package: 'DiagrammeR'
## The following object is masked from 'package:igraph':
## 
##     count_automorphisms
grViz("
digraph {
  
  # Set node defaults
  node [shape = diamond, style = filled, color = skyblue, fontname = 'Helvetica']

  # Add nodes to the graph
  A [label = 'Gene A']
  B [label = 'Gene B']
  C [label = 'Gene C']
  D [label = 'Gene D']
  E [label = 'Gene E']
  F [label = 'Gene F']
  G [label = 'Gene G']
  H [label = 'Gene H']
  I [label = 'Gene I']
  J [label = 'Gene J']
  K [label = 'Gene K']
  L [label = 'Gene L']
  M [label = 'Gene M']
  N [label = 'Gene N']
  
  # Set edge defaults
  edge [color = red]

  # Add edges to the graph
  A -> B
  A -> C
  B -> D
  C -> D
  C -> E
  D -> F
  E -> F
  F -> G
  F -> H
  G -> I
  H -> I
  I -> J
  I -> K
  J -> L
  K -> L
  L -> M
  L -> N
}
")

Brick plot

suppressWarnings(library(tidyverse))
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.2     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%()       masks igraph::%--%()
## ✖ tibble::as_data_frame() masks dplyr::as_data_frame(), igraph::as_data_frame()
## ✖ dplyr::combine()        masks Biobase::combine(), BiocGenerics::combine()
## ✖ purrr::compose()        masks igraph::compose()
## ✖ tidyr::crossing()       masks igraph::crossing()
## ✖ dplyr::filter()         masks stats::filter()
## ✖ dplyr::lag()            masks stats::lag()
## ✖ lubridate::pm()         masks affy::pm()
## ✖ ggplot2::Position()     masks BiocGenerics::Position(), base::Position()
## ✖ purrr::simplify()       masks igraph::simplify()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Generate mock data
set.seed(123)
metabolites <- paste0("Metabolite", 1:20)
n_endpoints <- sample(1:20, 100, replace = TRUE)
rank <- rank(-n_endpoints)
data <- data.frame(Metabolite = metabolites, N_adverse_events = n_endpoints, Rank = rank)

# Create the brick plot
ggplot(data, aes(x = Rank, y = N_adverse_events, fill = N_adverse_events)) +
  geom_tile() +
  geom_text(aes(label = Metabolite), size = 2, color = "white") +
  scale_fill_gradient(low = "#FDE725", high = "#440154") +
  coord_flip() +
  xlab("Rank") +
  ylab("Number of associated adverse events") +
  theme_bw()

This is a brick plot that shows the ranking of metabolites based on the number of associated adverse events. Each tile represents a metabolite, with the color indicating the number of associated adverse events. The darker the color, the more adverse events a metabolite is associated with. The tiles are ordered by their rank, which is based on the number of adverse events. The x-axis represents the rank, and the y-axis represents the number of associated endpoints. The labels on each tile indicate the name of the corresponding metabolite.

suppressWarnings(library(tidyverse))

# Load the airquality dataset
data(airquality)

# Create a summary table of the number of missing values for each variable
missing_values <- airquality %>%
  summarize_all(~sum(is.na(.))) %>%
  pivot_longer(everything(), names_to = "Variable", values_to = "N_Missing") %>%
  arrange(desc(N_Missing))

# Create the brick plot
ggplot(missing_values, aes(x = rank(-N_Missing), y = N_Missing, fill = N_Missing)) +
  geom_tile() +
  geom_text(aes(label = Variable), size = 3) +
  scale_fill_gradient(low = "yellow", high = "red") +
  coord_flip() +
  xlab("Rank") +
  ylab("Number of missing values") +
  theme_bw()

The x-axis shows the rank of each variable based on the number of missing values, and the y-axis shows the actual number of missing values. Each tile represents a variable, and its color represents the number of missing values it has. The darker the tile, the more missing values the variable has. The plot is useful for identifying which variables have the most missing values and to what extent they affect the data

Volcano plot

options(digits = 4)
suppressWarnings(library(limma))
## 
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
# Create a design matrix using your sample group information
design <- model.matrix(~ 0 + sample_metadata$sample_type)

# the linear model is fit to the gene expression data using the sample metadata information to define groups
fit <- lmFit(TCGA_eset, design)

# empirical Bayes statistics are applied to obtain t-statistics for each gene. 
#The t-statistics are used to rank the genes based on differential expression between groups
fit <- eBayes(fit)
#summary(fit)

#topTable function is to extract the top genes based on this ranking
#adjust parameter is set to "fdr" to adjust the p-values for multiple testing. 
#significance threshold is defined to identify the differentially expressed genes based on the adjusted p-value
results <- topTable(fit, number = nrow(TCGA_eset), adjust = "fdr", sort.by = "none")


head(results)
##          probeset  gene sample_metadata.sample_typehealthy
## A1CF  220951_s_at  A1CF                              3.004
## A2M     217757_at   A2M                              9.535
## A4GNT   221131_at A4GNT                              3.257
## AAAS    218075_at  AAAS                              4.894
## AACS  218434_s_at  AACS                              6.641
## AADAC   205969_at AADAC                              3.996
##       sample_metadata.sample_typetumor AveExpr      F P.Value adj.P.Val
## A1CF                             3.112   3.110  75617       0         0
## A2M                              8.792   8.803  27378       0         0
## A4GNT                            3.413   3.410 111434       0         0
## AAAS                             5.128   5.125  54033       0         0
## AACS                             6.910   6.906  41477       0         0
## AADAC                            4.903   4.890   6069       0         0
options(digits = 4)
library(ggplot2)

# Prepare the data for the volcano plot
# The log2 fold change is calculated as the difference in mean expression between two groups.
#The result is a numerical value that represents the magnitude and direction of expression between the two groups.
# e.g, if the log2 fold change for a particular gene is 1.5, it means that the mean expression level of that gene is 2^1.5 = 2.828 times higher in the tumor group than in the healthy group
#The adjusted p-value is calculated as the negative log10 of the adjusted p-value multiplied by the sign of the log2 fold change.
volcano_data <- data.frame(
  genes = results$gene,
  log2FoldChange = results$sample_metadata.sample_typetumor - results$sample_metadata.sample_typehealthy,
  pvalue = results$adj.P.Val,
  adj_pvalue = -log10(results$adj.P.Val) * sign(results$sample_metadata.sample_typetumor - results$sample_metadata.sample_typehealthy)
)

# Create the volcano plot
ggplot(volcano_data, aes(x = log2FoldChange, y = adj_pvalue)) +
  geom_point(aes(color = adj_pvalue > 0.05), alpha = 0.7) +
  scale_color_manual(values = c("blue", "red")) +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  labs(x = "Log2 Fold Change", y = "-log10 adjusted p-value", title = "Differentially Expressed Genes") +
  theme_classic()

The volcano plot shows the log2 fold change in expression (x-axis) versus the negative log10 adjusted p-value (y-axis) for each gene. The blue points represent genes that are not significantly differentially expressed (adjusted p-value > 0.05), while the red points represent genes that are significantly expressed (adjusted p-value < 0.05). The dashed horizontal line represents the threshold for statistical significance at an adjusted p-value of 0.05.

Time series plot

suppressWarnings(library(dygraphs))
data(co2)
dygraph(co2, main = "Atmospheric CO2 Concentrations") %>%
  dySeries("V1") %>% #specify which series (column) in the data frame to plot in the dygraph()
  dyRangeSelector()

scatter plot

suppressWarnings(library(rbokeh))

# Create sample data
x <- rnorm(100)
y <- rnorm(100)
label <- paste("Point", 1:100 )

# Create plot
p <- figure() %>%
  ly_points(x, y, hover = list(label),color="purple")

# Show plot
p

This is a scatter plot of 100 points, with the x-coordinates and y-coordinates randomly generated from a normal distribution. when hovering over a point, a label indicating the point number is displayed

#install.packages("plotly")
suppressWarnings(library(plotly))
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:igraph':
## 
##     groups
## The following object is masked from 'package:ComplexHeatmap':
## 
##     add_heatmap
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# Select two genes randomly
genes <- sample(rownames(exprs(TCGA_eset)), 2)

# Create a data frame with the gene expression values
plot_data <- data.frame(x = exprs(TCGA_eset)[genes[1], ],
                        y = exprs(TCGA_eset)[genes[2], ])

# Create plot
plot_ly(plot_data, x = ~x, y = ~y, type = "scatter", mode = "markers")

This is a scatter plot of two randomly selected genes from the TCGA_eset dataset. The x-axis represents the expression values of one gene and the y-axis represents the expression values of another gene. Each point on the plot represents a sample from the dataset, where the coordinates of the point are the expression values of the two selected genes for that sample.

MA plot

# M values is the difference in expression levels between two groups
logFC <- rnorm(nrow(TCGA_eset), mean = 0, sd = 1)
#A values (mean expression) across two groups
A_value <- rowMeans(exprs(TCGA_eset))
M_value <- logFC

ma_data <- data.frame(Gene = rownames(TCGA_eset),
                      A_value = A_value,
                      M_value = M_value)

ggplot(ma_data, aes(x = A_value, y = M_value)) +
  geom_point(alpha = 0.5) +
  xlab("A - Mean Expression") +
  ylab("M - log Fold Change") +
  theme_bw()

Each point represents a gene, and the position of the point indicates the magnitude of the log fold change and the mean expression. The plot is useful for visualizing differentially expressed genes, as genes with high absolute fold changes are plotted at the top or bottom of the plot, and genes with low mean expression are plotted towards the left or right of the plot. The alpha parameter is used to adjust the transparency of the points in the plot.the x-axis showing the average expression level (A value) and the y-axis showing the log fold change (M value) between two conditions or groups

3D scatterplot

suppressWarnings(library(plot3D))
suppressWarnings(library(rgl))
suppressWarnings(library(plotrix))
## 
## Attaching package: 'plotrix'
## The following object is masked from 'package:rgl':
## 
##     mtext3d
suppressWarnings(library(Rcmdr))
## Loading required package: splines
## Loading required package: RcmdrMisc
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:purrr':
## 
##     some
## The following object is masked from 'package:dplyr':
## 
##     recode
## Loading required package: sandwich
## Loading required package: effects
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## The Commander GUI is launched only in interactive sessions
## 
## Attaching package: 'Rcmdr'
## The following object is masked from 'package:base':
## 
##     errorCondition
# Create sample data
x <- rnorm(100)
y <- rnorm(100)
z <- rnorm(100)

# Create 3D scatterplot
scatter3d(x, y, z, main = "3D Scatterplot", xlab = "X", ylab = "Y", zlab = "Z")
## Loading required namespace: mgcv
## Warning in par3d(userMatrix = structure(c(1, 0, 0, 0, 0, 0.342020143325668, :
## font family "sans" not found, using "bitmap"

The scatterplot shows the distribution of 100 randomly generated values for the X, Y, and Z coordinates in 3D space.